{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import json\n",
    "import torch\n",
    "from tqdm import tqdm\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'webqsp'\n",
    "with open(f\"gpt_labeled_{dataset_name}_raw.jsonl\") as f:\n",
    "    data = [json.loads(line) for line in f]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from strings to tuples of triplets\n",
    "def get_tuples(input):\n",
    "    trunc_cnt = 0\n",
    "    pattern = r'(.*?)(,.*?\\..*?\\..*?,)(.*)'\n",
    "\n",
    "    output_evi = []\n",
    "    for evi in input:\n",
    "        match = re.search(pattern, evi)\n",
    "        if match is None or match.lastindex < 3:\n",
    "            trunc_cnt += 1\n",
    "        else:\n",
    "            output_evi.append((match.group(1).replace('(', '').strip(), match.group(2)[1:-1].strip(), match.group(3).replace(')', '').strip()))\n",
    "    return output_evi"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1628/1628 [00:01<00:00, 1265.89it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2175 22186 0.0980347967186514\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "non_exist_cnt = 0\n",
    "total_cnt = 0\n",
    "pattern = r'(.*?)(,.*?\\..*?\\..*?,)(.*)'\n",
    "num_evi = []\n",
    "for idx, each in enumerate(tqdm(data)):\n",
    "    input_evi = each['user_query'].split('Question')[0].split('\\n')[1:-2]\n",
    "    input_evi = get_tuples(input_evi)\n",
    "\n",
    "    output = each['prediction'].split('\\n')\n",
    "    output = [evi for evi in output if 'evidence:' in evi]\n",
    "    output = [evi[evi.find('evidence:') + len('evidence:') + 1 :].strip() for evi in output]\n",
    "\n",
    "    output_evi = get_tuples(output)\n",
    "    good_evi = []\n",
    "    for each_evi in output_evi:\n",
    "        total_cnt += 1\n",
    "\n",
    "        # two directions both count. in some cases, gpt will correct the order\n",
    "        if (each_evi in input_evi or tuple(reversed(each_evi)) in input_evi):\n",
    "            good_evi.append(each_evi)\n",
    "        else:\n",
    "            non_exist_cnt += 1\n",
    "            # if non_exist_cnt == 100:\n",
    "                # raise ValueError(f'{each_evi} not in {input_evi}')\n",
    "\n",
    "    data[idx]['gpt_labeled_evi'] = good_evi\n",
    "    num_evi.append(len(output_evi))\n",
    "\n",
    "print(non_exist_cnt, total_cnt, non_exist_cnt / total_cnt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1628/1628 [00:00<00:00, 797701.74it/s]\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAk00lEQVR4nO3df2xV9eH/8Vd/0CsF7u0K9N52tIi/gApFV7Hc+OPDpKOUjuGsiT8Y1I1IZLdm0M1BFwaim+1wmahD2LJFXGKHYkRDHWAtUuYsCNWGX9oIwRXT3tZJuBfqKNCe7x9+OdkVFG7p7X338nwkJ+k9533vfd+3xD5z7rm3cZZlWQIAADBIfLQnAAAA8FUECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjJEZ7Aj3R3d2tlpYWDRkyRHFxcdGeDgAAuAiWZen48ePKyMhQfPw3nyPpl4HS0tKizMzMaE8DAAD0wJEjRzRixIhvHNMvA2XIkCGSvnyBTqczyrMBAAAXIxgMKjMz0/49/k36ZaCcfVvH6XQSKAAA9DMXc3kGF8kCAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4idGeQCy7cvEbFxzzSWVRH8wEAID+hTMoAADAOAQKAAAwDoECAACMQ6AAAADjhBUoq1evVk5OjpxOp5xOp7xerzZt2mQfnzx5suLi4kK2hx56KOQxmpubVVRUpOTkZKWlpemRRx7RmTNneufVAACAmBDWp3hGjBihyspKXXvttbIsSy+88IJmzpypDz74QNdff70k6cEHH9Rjjz1m3yc5Odn+uaurS0VFRfJ4PHr33XfV2tqqOXPmaMCAAXriiSd66SUBAID+LqxAmTFjRsjt3/72t1q9erV27NhhB0pycrI8Hs957//mm2/qwIEDeuutt+R2u3XDDTfo8ccf16JFi/Too48qKSmphy8DAADEkh5fg9LV1aV169apo6NDXq/X3v/iiy9q2LBhGjdunMrLy/XFF1/Yx+rr6zV+/Hi53W57X0FBgYLBoPbv3/+1z9XZ2algMBiyAQCA2BX2F7Xt3btXXq9XJ0+e1ODBg7VhwwZlZ2dLku6//36NHDlSGRkZ2rNnjxYtWqSmpia9+uqrkiS/3x8SJ5Ls236//2ufs6KiQsuXLw93qgAAoJ8KO1BGjx6txsZGBQIBvfLKKyopKVFdXZ2ys7M1b948e9z48eOVnp6uKVOm6NChQ7r66qt7PMny8nKVlZXZt4PBoDIzM3v8eAAAwGxhv8WTlJSka665Rrm5uaqoqNCECRP09NNPn3dsXl6eJOngwYOSJI/Ho7a2tpAxZ29/3XUrkuRwOOxPDp3dAABA7Lrk70Hp7u5WZ2fneY81NjZKktLT0yVJXq9Xe/fuVXt7uz2mpqZGTqfTfpsIAAAgrLd4ysvLVVhYqKysLB0/flxVVVXatm2btmzZokOHDqmqqkrTp0/X0KFDtWfPHi1cuFC33367cnJyJElTp05Vdna2Zs+erRUrVsjv92vJkiXy+XxyOBwReYEAAKD/CStQ2tvbNWfOHLW2tsrlciknJ0dbtmzR9773PR05ckRvvfWWVq5cqY6ODmVmZqq4uFhLliyx75+QkKDq6mrNnz9fXq9XgwYNUklJScj3pgAAAMRZlmVFexLhCgaDcrlcCgQCRl+PcuXiNy445pPKoj6YCQAA0RfO72/+Fg8AADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIwTVqCsXr1aOTk5cjqdcjqd8nq92rRpk3385MmT8vl8Gjp0qAYPHqzi4mK1tbWFPEZzc7OKioqUnJystLQ0PfLIIzpz5kzvvBoAABATwgqUESNGqLKyUg0NDdq9e7fuuOMOzZw5U/v375ckLVy4UBs3btT69etVV1enlpYW3XXXXfb9u7q6VFRUpFOnTundd9/VCy+8oLVr12rp0qW9+6oAAEC/FmdZlnUpD5Camqonn3xSd999t4YPH66qqirdfffdkqSPPvpIY8eOVX19vSZNmqRNmzbp+9//vlpaWuR2uyVJa9as0aJFi/TZZ58pKSnpop4zGAzK5XIpEAjI6XReyvQj6srFb1xwzCeVRX0wEwAAoi+c3989vgalq6tL69atU0dHh7xerxoaGnT69Gnl5+fbY8aMGaOsrCzV19dLkurr6zV+/Hg7TiSpoKBAwWDQPgtzPp2dnQoGgyEbAACIXWEHyt69ezV48GA5HA499NBD2rBhg7Kzs+X3+5WUlKSUlJSQ8W63W36/X5Lk9/tD4uTs8bPHvk5FRYVcLpe9ZWZmhjttAADQj4QdKKNHj1ZjY6N27typ+fPnq6SkRAcOHIjE3Gzl5eUKBAL2duTIkYg+HwAAiK7EcO+QlJSka665RpKUm5urXbt26emnn9Y999yjU6dO6dixYyFnUdra2uTxeCRJHo9H7733Xsjjnf2Uz9kx5+NwOORwOMKdKgAA6Kcu+XtQuru71dnZqdzcXA0YMEC1tbX2saamJjU3N8vr9UqSvF6v9u7dq/b2dntMTU2NnE6nsrOzL3UqAAAgRoR1BqW8vFyFhYXKysrS8ePHVVVVpW3btmnLli1yuVyaO3euysrKlJqaKqfTqYcfflher1eTJk2SJE2dOlXZ2dmaPXu2VqxYIb/fryVLlsjn83GGBAAA2MIKlPb2ds2ZM0etra1yuVzKycnRli1b9L3vfU+S9NRTTyk+Pl7FxcXq7OxUQUGBnnvuOfv+CQkJqq6u1vz58+X1ejVo0CCVlJToscce691XBQAA+rVL/h6UaOB7UAAA6H/65HtQAAAAIoVAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxgkrUCoqKjRx4kQNGTJEaWlpuvPOO9XU1BQyZvLkyYqLiwvZHnrooZAxzc3NKioqUnJystLS0vTII4/ozJkzl/5qAABATEgMZ3BdXZ18Pp8mTpyoM2fO6Fe/+pWmTp2qAwcOaNCgQfa4Bx98UI899ph9Ozk52f65q6tLRUVF8ng8evfdd9Xa2qo5c+ZowIABeuKJJ3rhJQEAgP4urEDZvHlzyO21a9cqLS1NDQ0Nuv322+39ycnJ8ng8532MN998UwcOHNBbb70lt9utG264QY8//rgWLVqkRx99VElJST14GQAAIJZc0jUogUBAkpSamhqy/8UXX9SwYcM0btw4lZeX64svvrCP1dfXa/z48XK73fa+goICBYNB7d+//7zP09nZqWAwGLIBAIDYFdYZlP/V3d2tBQsW6JZbbtG4cePs/ffff79GjhypjIwM7dmzR4sWLVJTU5NeffVVSZLf7w+JE0n2bb/ff97nqqio0PLly3s6VQAA0M/0OFB8Pp/27dund955J2T/vHnz7J/Hjx+v9PR0TZkyRYcOHdLVV1/do+cqLy9XWVmZfTsYDCozM7NnEwcAAMbr0Vs8paWlqq6u1ttvv60RI0Z849i8vDxJ0sGDByVJHo9HbW1tIWPO3v6661YcDoecTmfIBgAAYldYgWJZlkpLS7VhwwZt3bpVo0aNuuB9GhsbJUnp6emSJK/Xq71796q9vd0eU1NTI6fTqezs7HCmAwAAYlRYb/H4fD5VVVXp9ddf15AhQ+xrRlwulwYOHKhDhw6pqqpK06dP19ChQ7Vnzx4tXLhQt99+u3JyciRJU6dOVXZ2tmbPnq0VK1bI7/dryZIl8vl8cjgcvf8KAQBAvxPWGZTVq1crEAho8uTJSk9Pt7eXXnpJkpSUlKS33npLU6dO1ZgxY/Tzn/9cxcXF2rhxo/0YCQkJqq6uVkJCgrxer370ox9pzpw5Id+bAgAALm9hnUGxLOsbj2dmZqquru6CjzNy5Ej94x//COepAQDAZYS/xQMAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjJEZ7AriwKxe/cVHjPqksivBMAADoG5xBAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHL2qLsov9EjYAAC4nnEEBAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHHCCpSKigpNnDhRQ4YMUVpamu688041NTWFjDl58qR8Pp+GDh2qwYMHq7i4WG1tbSFjmpubVVRUpOTkZKWlpemRRx7RmTNnLv3VAACAmBBWoNTV1cnn82nHjh2qqanR6dOnNXXqVHV0dNhjFi5cqI0bN2r9+vWqq6tTS0uL7rrrLvt4V1eXioqKdOrUKb377rt64YUXtHbtWi1durT3XhUAAOjX4izLsnp6588++0xpaWmqq6vT7bffrkAgoOHDh6uqqkp33323JOmjjz7S2LFjVV9fr0mTJmnTpk36/ve/r5aWFrndbknSmjVrtGjRIn322WdKSkq64PMGg0G5XC4FAgE5nc6eTj/i+vpL2D6pLOrT5wMAIBzh/P6+pGtQAoGAJCk1NVWS1NDQoNOnTys/P98eM2bMGGVlZam+vl6SVF9fr/Hjx9txIkkFBQUKBoPav3//eZ+ns7NTwWAwZAMAALGrx4HS3d2tBQsW6JZbbtG4ceMkSX6/X0lJSUpJSQkZ63a75ff77TH/Gydnj589dj4VFRVyuVz2lpmZ2dNpAwCAfqDHgeLz+bRv3z6tW7euN+dzXuXl5QoEAvZ25MiRiD8nAACInh79scDS0lJVV1dr+/btGjFihL3f4/Ho1KlTOnbsWMhZlLa2Nnk8HnvMe++9F/J4Zz/lc3bMVzkcDjkcjp5MFQAA9ENhnUGxLEulpaXasGGDtm7dqlGjRoUcz83N1YABA1RbW2vva2pqUnNzs7xeryTJ6/Vq7969am9vt8fU1NTI6XQqOzv7Ul4LAACIEWGdQfH5fKqqqtLrr7+uIUOG2NeMuFwuDRw4UC6XS3PnzlVZWZlSU1PldDr18MMPy+v1atKkSZKkqVOnKjs7W7Nnz9aKFSvk9/u1ZMkS+Xw+zpIAAABJYQbK6tWrJUmTJ08O2f/888/rgQcekCQ99dRTio+PV3FxsTo7O1VQUKDnnnvOHpuQkKDq6mrNnz9fXq9XgwYNUklJiR577LFLeyUAACBmXNL3oEQL34NyfnwPCgDAZH32PSgAAACRQKAAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjhB0o27dv14wZM5SRkaG4uDi99tprIccfeOABxcXFhWzTpk0LGXP06FHNmjVLTqdTKSkpmjt3rk6cOHFJLwQAAMSOsAOlo6NDEyZM0KpVq752zLRp09Ta2mpvf//730OOz5o1S/v371dNTY2qq6u1fft2zZs3L/zZAwCAmJQY7h0KCwtVWFj4jWMcDoc8Hs95j3344YfavHmzdu3apZtuukmS9Oyzz2r69On6/e9/r4yMjHCnBAAAYkxErkHZtm2b0tLSNHr0aM2fP1+ff/65fay+vl4pKSl2nEhSfn6+4uPjtXPnzvM+Xmdnp4LBYMgGAABiV68HyrRp0/S3v/1NtbW1+t3vfqe6ujoVFhaqq6tLkuT3+5WWlhZyn8TERKWmpsrv95/3MSsqKuRyuewtMzOzt6cNAAAMEvZbPBdy77332j+PHz9eOTk5uvrqq7Vt2zZNmTKlR49ZXl6usrIy+3YwGCRSAACIYRH/mPFVV12lYcOG6eDBg5Ikj8ej9vb2kDFnzpzR0aNHv/a6FYfDIafTGbIBAIDYFfFA+fTTT/X5558rPT1dkuT1enXs2DE1NDTYY7Zu3aru7m7l5eVFejoAAKAfCPstnhMnTthnQyTp8OHDamxsVGpqqlJTU7V8+XIVFxfL4/Ho0KFD+uUvf6lrrrlGBQUFkqSxY8dq2rRpevDBB7VmzRqdPn1apaWluvfee/kEDwAAkNSDMyi7d+/WjTfeqBtvvFGSVFZWphtvvFFLly5VQkKC9uzZox/84Ae67rrrNHfuXOXm5uqf//ynHA6H/RgvvviixowZoylTpmj69Om69dZb9ec//7n3XhUAAOjXwj6DMnnyZFmW9bXHt2zZcsHHSE1NVVVVVbhPDQAALhP8LR4AAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcRKjPQH0rSsXv3HBMZ9UFvXBTAAA+HqcQQEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxgk7ULZv364ZM2YoIyNDcXFxeu2110KOW5alpUuXKj09XQMHDlR+fr4+/vjjkDFHjx7VrFmz5HQ6lZKSorlz5+rEiROX9EIAAEDsCDtQOjo6NGHCBK1ateq8x1esWKFnnnlGa9as0c6dOzVo0CAVFBTo5MmT9phZs2Zp//79qqmpUXV1tbZv36558+b1/FUAAICYkhjuHQoLC1VYWHjeY5ZlaeXKlVqyZIlmzpwpSfrb3/4mt9ut1157Tffee68+/PBDbd68Wbt27dJNN90kSXr22Wc1ffp0/f73v1dGRsYlvBwAABALevUalMOHD8vv9ys/P9/e53K5lJeXp/r6eklSfX29UlJS7DiRpPz8fMXHx2vnzp3nfdzOzk4Fg8GQDQAAxK5eDRS/3y9JcrvdIfvdbrd9zO/3Ky0tLeR4YmKiUlNT7TFfVVFRIZfLZW+ZmZm9OW0AAGCYfvEpnvLycgUCAXs7cuRItKcEAAAiqFcDxePxSJLa2tpC9re1tdnHPB6P2tvbQ46fOXNGR48etcd8lcPhkNPpDNkAAEDs6tVAGTVqlDwej2pra+19wWBQO3fulNfrlSR5vV4dO3ZMDQ0N9pitW7equ7tbeXl5vTkdAADQT4X9KZ4TJ07o4MGD9u3Dhw+rsbFRqampysrK0oIFC/Sb3/xG1157rUaNGqVf//rXysjI0J133ilJGjt2rKZNm6YHH3xQa9as0enTp1VaWqp7772XT/AAAABJPQiU3bt367vf/a59u6ysTJJUUlKitWvX6pe//KU6Ojo0b948HTt2TLfeeqs2b96sK664wr7Piy++qNLSUk2ZMkXx8fEqLi7WM8880wsvBwAAxII4y7KsaE8iXMFgUC6XS4FAwOjrUa5c/EafPt8nlUUXHHMxc7qYxwEAIFzh/P7uF5/iAQAAlxcCBQAAGIdAAQAAxiFQAACAccL+FA/M1dcX5QIAECmcQQEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGSYz2BPqrKxe/Ee0pAAAQsziDAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMw6d40CMX8ymmTyqL+mAmAIBYxBkUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGCcXg+URx99VHFxcSHbmDFj7OMnT56Uz+fT0KFDNXjwYBUXF6utra23pwEAAPqxiJxBuf7669Xa2mpv77zzjn1s4cKF2rhxo9avX6+6ujq1tLTorrvuisQ0AABAPxWR70FJTEyUx+M5Z38gENBf//pXVVVV6Y477pAkPf/88xo7dqx27NihSZMmRWI6AACgn4nIGZSPP/5YGRkZuuqqqzRr1iw1NzdLkhoaGnT69Gnl5+fbY8eMGaOsrCzV19d/7eN1dnYqGAyGbAAAIHb1eqDk5eVp7dq12rx5s1avXq3Dhw/rtttu0/Hjx+X3+5WUlKSUlJSQ+7jdbvn9/q99zIqKCrlcLnvLzMzs7WkDAACD9PpbPIWFhfbPOTk5ysvL08iRI/Xyyy9r4MCBPXrM8vJylZWV2beDwSCRAgBADIv4x4xTUlJ03XXX6eDBg/J4PDp16pSOHTsWMqatre2816yc5XA45HQ6QzYAABC7Iv7HAk+cOKFDhw5p9uzZys3N1YABA1RbW6vi4mJJUlNTk5qbm+X1eiM9FVyki/lDgAAARFKvB8ovfvELzZgxQyNHjlRLS4uWLVumhIQE3XfffXK5XJo7d67KysqUmpoqp9Ophx9+WF6vl0/wAAAAW68Hyqeffqr77rtPn3/+uYYPH65bb71VO3bs0PDhwyVJTz31lOLj41VcXKzOzk4VFBToueee6+1pAACAfizOsiwr2pMIVzAYlMvlUiAQiNr1KLwNcmGfVBZFewoAAIOE8/s74tegABdyMbFH7ADA5YU/FggAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4/C0eRAx/UBEA0FOcQQEAAMYhUAAAgHF4iwcx42LeUvqksqgPZgIAuFScQQEAAMYhUAAAgHEIFAAAYByuQTkPPh5rHv6bAMDlhTMoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4fIoH+IqL/cQQ30oLAJFDoAARxNfvA0DP8BYPAAAwDmdQcFnpzS9848vjACByOIMCAACMwxkU4DLDdTEA+gMCBcA5euvtq/4aOnySC4g+AgXoBzjrAeBywzUoAADAOJxBAaKMTwMBwLkIFCBGmBg6Jr41xToB/QOBAgA9RFgAkUOgAECM4NNHiCVRDZRVq1bpySeflN/v14QJE/Tss8/q5ptvjuaUAPSx3nzLJZZ/8Zr41lQs4+xY9EUtUF566SWVlZVpzZo1ysvL08qVK1VQUKCmpialpaVFa1oA+jETf4mbOKfe0lu/xHvzzE8sh8Xl9v1EcZZlWdF44ry8PE2cOFF//OMfJUnd3d3KzMzUww8/rMWLF3/jfYPBoFwulwKBgJxOZ6/PLZb/hwIAl7v+Gjp9/bspEq8vnN/fUTmDcurUKTU0NKi8vNzeFx8fr/z8fNXX158zvrOzU52dnfbtQCAg6csXGgndnV9E5HEBANGXtXB9rzzOxf4OGrdsywXH7FtecMExff27KRK/Y88+5sWcG4lKoPznP/9RV1eX3G53yH63262PPvronPEVFRVavnz5OfszMzMjNkcAAL6Ja6WZj9VbIjmn48ePy+VyfeOYfvEpnvLycpWVldm3u7u7dfToUQ0dOlRxcXG9+lzBYFCZmZk6cuRIRN4+QijWu2+x3n2L9e5brHff6sl6W5al48ePKyMj44JjoxIow4YNU0JCgtra2kL2t7W1yePxnDPe4XDI4XCE7EtJSYnkFOV0OvkH3odY777Fevct1rtvsd59K9z1vtCZk7Oi8rd4kpKSlJubq9raWntfd3e3amtr5fV6ozElAABgkKi9xVNWVqaSkhLddNNNuvnmm7Vy5Up1dHToxz/+cbSmBAAADBG1QLnnnnv02WefaenSpfL7/brhhhu0efPmcy6c7WsOh0PLli075y0lRAbr3bdY777Fevct1rtvRXq9o/Y9KAAAAF8nKtegAAAAfBMCBQAAGIdAAQAAxiFQAACAcQiU/7Fq1SpdeeWVuuKKK5SXl6f33nsv2lOKCdu3b9eMGTOUkZGhuLg4vfbaayHHLcvS0qVLlZ6eroEDByo/P18ff/xxdCYbAyoqKjRx4kQNGTJEaWlpuvPOO9XU1BQy5uTJk/L5fBo6dKgGDx6s4uLic744ERdn9erVysnJsb+syuv1atOmTfZx1jqyKisrFRcXpwULFtj7WPPe8+ijjyouLi5kGzNmjH08kmtNoPx/L730ksrKyrRs2TK9//77mjBhggoKCtTe3h7tqfV7HR0dmjBhglatWnXe4ytWrNAzzzyjNWvWaOfOnRo0aJAKCgp08uTJPp5pbKirq5PP59OOHTtUU1Oj06dPa+rUqero6LDHLFy4UBs3btT69etVV1enlpYW3XXXXVGcdf81YsQIVVZWqqGhQbt379Ydd9yhmTNnav/+/ZJY60jatWuX/vSnPyknJydkP2veu66//nq1trba2zvvvGMfi+haW7Asy7Juvvlmy+fz2be7urqsjIwMq6KiIoqzij2SrA0bNti3u7u7LY/HYz355JP2vmPHjlkOh8P6+9//HoUZxp729nZLklVXV2dZ1pfrO2DAAGv9+vX2mA8//NCSZNXX10drmjHlW9/6lvWXv/yFtY6g48ePW9dee61VU1Nj/d///Z/1s5/9zLIs/n33tmXLllkTJkw477FIrzVnUCSdOnVKDQ0Nys/Pt/fFx8crPz9f9fX1UZxZ7Dt8+LD8fn/I2rtcLuXl5bH2vSQQCEiSUlNTJUkNDQ06ffp0yJqPGTNGWVlZrPkl6urq0rp169TR0SGv18taR5DP51NRUVHI2kr8+46Ejz/+WBkZGbrqqqs0a9YsNTc3S4r8WveLv2Ycaf/5z3/U1dV1zrfYut1uffTRR1Ga1eXB7/dL0nnX/uwx9Fx3d7cWLFigW265RePGjZP05ZonJSWd8wc3WfOe27t3r7xer06ePKnBgwdrw4YNys7OVmNjI2sdAevWrdP777+vXbt2nXOMf9+9Ky8vT2vXrtXo0aPV2tqq5cuX67bbbtO+ffsivtYEChDDfD6f9u3bF/KeMXrf6NGj1djYqEAgoFdeeUUlJSWqq6uL9rRi0pEjR/Szn/1MNTU1uuKKK6I9nZhXWFho/5yTk6O8vDyNHDlSL7/8sgYOHBjR5+YtHknDhg1TQkLCOVcet7W1yePxRGlWl4ez68va977S0lJVV1fr7bff1ogRI+z9Ho9Hp06d0rFjx0LGs+Y9l5SUpGuuuUa5ubmqqKjQhAkT9PTTT7PWEdDQ0KD29nZ95zvfUWJiohITE1VXV6dnnnlGiYmJcrvdrHkEpaSk6LrrrtPBgwcj/u+bQNGX/3PJzc1VbW2tva+7u1u1tbXyer1RnFnsGzVqlDweT8jaB4NB7dy5k7XvIcuyVFpaqg0bNmjr1q0aNWpUyPHc3FwNGDAgZM2bmprU3NzMmveS7u5udXZ2stYRMGXKFO3du1eNjY32dtNNN2nWrFn2z6x55Jw4cUKHDh1Senp65P99X/JltjFi3bp1lsPhsNauXWsdOHDAmjdvnpWSkmL5/f5oT63fO378uPXBBx9YH3zwgSXJ+sMf/mB98MEH1r///W/LsiyrsrLSSklJsV5//XVrz5491syZM61Ro0ZZ//3vf6M88/5p/vz5lsvlsrZt22a1trba2xdffGGPeeihh6ysrCxr69at1u7duy2v12t5vd4ozrr/Wrx4sVVXV2cdPnzY2rNnj7V48WIrLi7OevPNNy3LYq37wv9+iseyWPPe9POf/9zatm2bdfjwYetf//qXlZ+fbw0bNsxqb2+3LCuya02g/I9nn33WysrKspKSkqybb77Z2rFjR7SnFBPefvttS9I5W0lJiWVZX37U+Ne//rXldrsth8NhTZkyxWpqaorupPux8621JOv555+3x/z3v/+1fvrTn1rf+ta3rOTkZOuHP/yh1draGr1J92M/+clPrJEjR1pJSUnW8OHDrSlTpthxYlmsdV/4aqCw5r3nnnvusdLT062kpCTr29/+tnXPPfdYBw8etI9Hcq3jLMuyLv08DAAAQO/hGhQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBx/h+WCsCWHgfhdwAAAABJRU5ErkJggg==",
      "text/plain": [
       "<Figure size 640x480 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# for samples GPT provides more than 20 evidences, perhaps remove them\n",
    "for idx, each in enumerate(tqdm(data)):\n",
    "    if len(data[idx]['gpt_labeled_evi']) > 20:\n",
    "        data[idx]['gpt_labeled_evi'] = []\n",
    "fig = plt.hist(num_evi, bins=range(0, 50))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1411 1628 0.8667076167076168\n"
     ]
    }
   ],
   "source": [
    "# count coverage of samples that GPT provides valid evidences\n",
    "cnt = 0\n",
    "for each in data:\n",
    "    if len(each['gpt_labeled_evi']) > 0:\n",
    "        cnt += 1\n",
    "print(cnt, len(data), cnt / len(data))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "res = {}\n",
    "for each in data:\n",
    "    res[each['id']] = each['gpt_labeled_evi']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "torch.save(res, f'gpt_labeled_{dataset_name}_cleaned.pt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# res['WebQTest-832_c334509bb5e02cacae1ba2e80c176499']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "cuda121",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
